*This file prepares the industry level data for analysis.

set more off, permanently

use "$path/industry_data_isicfin_all4digit.dta" , clear
drop isic wholeprice_index export_index costliving_index
rename isic_FIN isic

*Correct for change in wooden shipbuilding reported in statistics Finland publications where values moved betweeen industries in 1958

replace Labor_all=Labor_all+1374 if isic=="3811"&year>1958 
replace Labor_all=Labor_all-1374 if isic=="3812"&year>1958 

replace gross_value_real=gross_value_real+173203673 if isic=="3811"&year>1958 
replace gross_value_real=gross_value_real-173203673 if isic=="3812"&year>1958 

*Drop one industry with drop in values.
drop if isic=="2723"

*Join with the reparations data 

joinby isic year using   "$path/war_reparations_isic19504digit", unmatched(both)

drop if _merge==2

drop ind _merge 

egen ind=group(isic)

xtset ind year

sort ind year

*Generate sum of total reparations for each industry
bys ind: egen repa_tot=sum(reparations_real_main)

gen post=(year>1943)
replace post=0 if post==.

gen isic_3=substr(isic,1,3)

egen three_d=group(isic_3)

*Create lagged control variables

gen digt2=substr(isic,1,2)
destring digt2, replace 
gen digt1=substr(isic,1,1)
destring digt1, replace

forvalues x=34/70 {
gen ydum19`x'=0
replace ydum19`x'=1 if year==19`x'
}

gen power_labor_share=power/(Labor+Labor_other)
gen skilled_labor_share=Labor_other/(Labor+Labor_other)
gen relative_wage=wages_real/Labor
gen productivity=gross_value_real/(Labor)
gen lnwage=ln(wages_real)
gen lnlabor_all=ln(Labor_all)
gen ln_value_prod=ln(gross_value_real)
gen ln_relative_wage=ln(relative_wage)
gen ln_valueadded=ln(value_added)
gen ln_inputs_all=ln(Inputs_all_real+1)
gen ln_establishments=ln(Establishments+1)
gen ln_productivity=ln(gross_value_real/Labor)
gen ln_power=ln(power)

forvalues  x=34/43 {
bys isic: gen ln_value_prod19`x'1=ln_value_prod if year==19`x'
egen ln_value_prod19`x' = mean(ln_value_prod19`x'1), by(isic)
drop ln_value_prod19`x'1
}	

forvalues  x=34/43 {
bys isic: gen value_prod19`x'1=gross_value_real if year==19`x'
egen value_prod19`x' = mean(value_prod19`x'1), by(isic)
drop value_prod19`x'1
}	

forvalues  x=34/43 {
bys isic: gen lnlabor_all19`x'1=lnlabor_all if year==19`x'
egen lnlabor_all19`x' = mean(lnlabor_all19`x'1), by(isic)
drop lnlabor_all19`x'1
}	
   
forvalues  x=34/43 {
bys isic: gen Labor_all19`x'1=Labor_all if year==19`x'
egen Labor_all19`x' = mean(Labor_all19`x'1), by(isic)
drop Labor_all19`x'1
}	

forvalues  x=34/43 {
bys isic: gen ln_relative_wage19`x'1=ln_relative_wage if year==19`x'
egen ln_relative_wage19`x' = mean(ln_relative_wage19`x'1), by(isic)
drop ln_relative_wage19`x'1
}	

forvalues  x=34/43 {
bys isic: gen skilled_labor_share19`x'1=skilled_labor_share if year==19`x'
egen skilled19`x' = mean(skilled_labor_share19`x'1), by(isic)
drop skilled_labor_share19`x'1
}	

forvalues  x=34/43 {
bys isic: gen power_labor_share19`x'1=power_labor_share if year==19`x'
egen power_labor_share19`x' = mean(power_labor_share19`x'1), by(isic)
drop power_labor_share19`x'1
}	

forvalues  x=34/43 {
bys isic: gen ln_productivity19`x'1=ln_productivity if year==19`x'
egen ln_productivity19`x' = mean(ln_productivity19`x'1), by(isic)
drop ln_productivity19`x'1
}	

forvalues  x=34/43 {
bys isic: gen ln_valueadded19`x'1=ln_valueadded if year==19`x'
egen ln_valueadded19`x' = mean(ln_valueadded19`x'1), by(isic)
drop ln_valueadded19`x'1
}	

forvalues  x=34/43 {
bys isic: gen ln_establishments19`x'1=ln_establishments if year==19`x'
egen ln_establishments19`x' = mean(ln_establishments19`x'1), by(isic)
drop ln_establishments19`x'1
}	

forvalues  x=34/43 {
bys isic: gen ln_inputs_all19`x'1=ln_inputs_all if year==19`x'
egen ln_inputs_all19`x' = mean(ln_inputs_all19`x'1), by(isic)
drop ln_inputs_all19`x'1
}	

global controls43 c.power_labor_share1943#year    ///
c.ln_value_prod1943#year  c.lnlabor_all1943#year c.ln_relative_wage1943#year c.ln_establishments1943#year c.skilled1943#year c.ln_inputs_all1943#year c.ln_valueadded1943#year 
 
 
*Create Treatment variables

*Established industries
gen timberpaper=(digt2==25|digt2==27)
gen nottimberpaper=(timberpaper==0)

preserve
tempfile treat
keep if year==1943

*Share of reparations per worker in 1943
gen repa_sharel=repa_tot/(Labor_all*1000)

*Standardized values
egen treat_std_l=std(repa_sharel) 

egen treat_std_l_tp=std(repa_sharel*timberpaper) 
egen treat_std_l_nontp=std(repa_sharel*nottimberpaper) 

*linear model
gen ln_repa_tot=ln(repa_tot+1) 

*gen high and low treatment
xtile decile_repashare_2= repa_sharel if repa_sharel>0 , nq(2)
replace decile_repashare_2=0 if decile_repashare_2==.

gen d2_s=decile_repashare_2==2
gen d1_s=decile_repashare_2==1

keep isic  repa_sharel treat_std_l ln_repa_tot decile_repashare_2 d2_s d1_s 

save `treat'
restore

joinby isic using `treat'

*Create post interaction

gen treat_dummy=(repa_tot>0)
 
gen treatpost=post*treat_dummy

label var treatpost "Treat x Post"

gen highpost=post*d2_s 
gen lowpost=post*d1_s 

label var highpost "High Reparations x Post"
label var lowpost "Low Reparations x Post"

gen linearpost=post*ln_repa_tot


gen scaledpost=treat_std_l*post

label var linearpost "ln(Reparations) x Post"

label var scaledpost "Reparations share x Post"


*Select Elastic net controls
 
global controlsset43   ln_value_prod1943 lnlabor_all1943    power_labor_share1943   ln_relative_wage1943 ln_establishments1943  skilled1943  ln_inputs_all1943  ln_valueadded1943 
 
preserve
keep if year==1943

*In 2023 ran in Stata 17
set seed 2211
elasticnet linear repa_sharel  $controlsset43  

lassoinfo
lassocoef

global sharecontrols c.lnlabor_all1943#year  c.power_labor_share1943#year c.ln_relative_wage1943#year c.ln_establishments1943#year c.skilled1943#year c.ln_inputs_all1943#year c.ln_valueadded1943#year 

restore

preserve
keep if year==1943

set seed 2211
elasticnet linear ln_repa_tot  $controlsset43  

lassoinfo
lassocoef

global linearcontrols c.lnlabor_all1943#year  c.power_labor_share1943#year c.ln_relative_wage1943#year c.ln_value_prod1943#year c.ln_establishments1943#year c.skilled1943#year c.ln_inputs_all1943#year c.ln_valueadded1943#year 

restore

preserve
keep if year==1943

set seed 2212
elasticnet logit d2_s  $controlsset43  

lassoinfo
lassocoef

global highcontrols c.lnlabor_all1943#year  c.power_labor_share1943#year c.ln_relative_wage1943#year c.ln_establishments1943#year c.skilled1943#year c.ln_inputs_all1943#year 

restore


* Label the variables
label variable year "Year"
label variable isic "Industry (ISIC Code)"
label variable gross_value "Gross Value of Output"
label variable Labor "Labor (Number of Workers)"
label variable Labor_all "Total Labor"
label variable wages "Wages"
label variable power "Power usage"
label variable inputs_foreign "Foreign Inputs"
label variable Inputs_all "Total Inputs"
label variable Establishments "Establishments"
label variable gross_value_real "Real Gross Value of Output"
label variable wages_real "Real Wages"
label variable Inputs_all_real "Real Total Inputs"
label variable inputs_foreign_real "Real Foreign Inputs"
label variable Labor_other "Other Labor"
label variable value_added "Value Added"
label variable repa_tot "Total Reparations"
label variable post "Post-Treatment Indicator"
label variable isic_3 "3-Digit ISIC Code"
label variable three_d "Three-Digit Sector Code"
label variable digt2 "Two-Digit Sector Code"
label variable digt1 "One-Digit Sector Code"
label variable ydum1934 "Year Dummy 1934"
label variable ydum1935 "Year Dummy 1935"
label variable ydum1936 "Year Dummy 1936"
label variable ydum1937 "Year Dummy 1937"
label variable ydum1938 "Year Dummy 1938"
label variable ydum1939 "Year Dummy 1939"
label variable ydum1940 "Year Dummy 1940"
label variable ydum1941 "Year Dummy 1941"
label variable ydum1942 "Year Dummy 1942"
label variable ydum1943 "Year Dummy 1943"
label variable ydum1944 "Year Dummy 1944"
label variable ydum1945 "Year Dummy 1945"
label variable ydum1946 "Year Dummy 1946"
label variable ydum1947 "Year Dummy 1947"
label variable ydum1948 "Year Dummy 1948"
label variable ydum1949 "Year Dummy 1949"
label variable ydum1950 "Year Dummy 1950"
label variable ydum1951 "Year Dummy 1951"
label variable ydum1952 "Year Dummy 1952"
label variable ydum1953 "Year Dummy 1953"
label variable ydum1954 "Year Dummy 1954"
label variable ydum1955 "Year Dummy 1955"
label variable ydum1956 "Year Dummy 1956"
label variable ydum1957 "Year Dummy 1957"
label variable ydum1958 "Year Dummy 1958"
label variable ydum1959 "Year Dummy 1959"
label variable ydum1960 "Year Dummy 1960"
label variable ydum1961 "Year Dummy 1961"
label variable ydum1962 "Year Dummy 1962"
label variable ydum1963 "Year Dummy 1963"
label variable ydum1964 "Year Dummy 1964"
label variable ydum1965 "Year Dummy 1965"
label variable ydum1966 "Year Dummy 1966"
label variable ydum1967 "Year Dummy 1967"
label variable ydum1968 "Year Dummy 1968"
label variable ydum1969 "Year Dummy 1969"
label variable ydum1970 "Year Dummy 1970"
label variable power_labor_share "Power to Labor Share"
label variable skilled_labor_share "Skilled Labor Share"
label variable relative_wage "Mean Wage"
label variable productivity "Productivity"
label variable lnwage "Log of Wages"
label variable lnlabor_all "Log of Total Labor"
label variable ln_value_prod "Log of Value Produced"
label variable ln_relative_wage "Log of Relative Wage"
label variable ln_valueadded "Log of Value Added"
label variable ln_inputs_all "Log of Total Inputs"
label variable ln_establishments "Log of Establishments"
label variable ln_productivity "Log of Productivity"
label variable ln_power "Log of Power Consumption"
label variable ln_value_prod1934 "Log of Value Produced in 1934"
label variable ln_value_prod1935 "Log of Value Produced in 1935"
label variable ln_value_prod1936 "Log of Value Produced in 1936"
label variable ln_value_prod1937 "Log of Value Produced in 1937"
label variable ln_value_prod1938 "Log of Value Produced in 1938"
label variable ln_value_prod1939 "Log of Value Produced in 1939"
label variable ln_value_prod1940 "Log of Value Produced in 1940"
label variable ln_value_prod1941 "Log of Value Produced in 1941"
label variable ln_value_prod1942 "Log of Value Produced in 1942"
label variable ln_value_prod1943 "Log of Value Produced in 1943"
label variable value_prod1934 "Value Produced in 1934"
label variable value_prod1935 "Value Produced in 1935"
label variable value_prod1936 "Value Produced in 1936"
label variable value_prod1937 "Value Produced in 1937"
label variable value_prod1938 "Value Produced in 1938"
label variable value_prod1939 "Value Produced in 1939"
label variable value_prod1940 "Value Produced in 1940"
label variable value_prod1941 "Value Produced in 1941"
label variable value_prod1942 "Value Produced in 1942"
label variable value_prod1943 "Value Produced in 1943"
label variable lnlabor_all1934 "Log of Total Labor in 1934"
label variable lnlabor_all1935 "Log of Total Labor in 1935"
label variable lnlabor_all1936 "Log of Total Labor in 1936"
label variable lnlabor_all1937 "Log of Total Labor in 1937"
label variable lnlabor_all1938 "Log of Total Labor in 1938"
label variable lnlabor_all1939 "Log of Total Labor in 1939"
label variable lnlabor_all1940 "Log of Total Labor in 1940"
label variable lnlabor_all1941 "Log of Total Labor in 1941"
label variable lnlabor_all1942 "Log of Total Labor in 1942"
label variable lnlabor_all1943 "Log of Total Labor in 1943"
label variable Labor_all1934 "Total Labor in 1934"
label variable Labor_all1935 "Total Labor in 1935"
label variable Labor_all1936 "Total Labor in 1936"
label variable Labor_all1937 "Total Labor in 1937"
label variable Labor_all1938 "Total Labor in 1938"
label variable Labor_all1939 "Total Labor in 1939"
label variable Labor_all1940 "Total Labor in 1940"
label variable Labor_all1941 "Total Labor in 1941"
label variable Labor_all1942 "Total Labor in 1942"
label variable Labor_all1943 "Total Labor in 1943"
label variable ln_relative_wage1934 "Log of Relative Wage in 1934"
label variable ln_relative_wage1935 "Log of Relative Wage in 1935"
label variable ln_relative_wage1936 "Log of Relative Wage in 1936"
label variable ln_relative_wage1937 "Log of Relative Wage in 1937"
label variable ln_relative_wage1938 "Log of Relative Wage in 1938"
label variable ln_relative_wage1939 "Log of Relative Wage in 1939"
label variable ln_relative_wage1940 "Log of Relative Wage in 1940"
label variable ln_relative_wage1941 "Log of Relative Wage in 1941"
label variable ln_relative_wage1942 "Log of Relative Wage in 1942"
label variable ln_relative_wage1943 "Log of Relative Wage in 1943"
label variable skilled1934 "Skilled Labor in 1934"
label variable skilled1935 "Skilled Labor in 1935"
label variable skilled1936 "Skilled Labor in 1936"
label variable skilled1937 "Skilled Labor in 1937"
label variable skilled1938 "Skilled Labor in 1938"
label variable skilled1939 "Skilled Labor in 1939"
label variable skilled1940 "Skilled Labor in 1940"
label variable skilled1941 "Skilled Labor in 1941"
label variable skilled1942 "Skilled Labor in 1942"
label variable skilled1943 "Skilled Labor in 1943"
label variable power_labor_share1934 "Power to Labor Share in 1934"
label variable power_labor_share1935 "Power to Labor Share in 1935"
label variable power_labor_share1936 "Power to Labor Share in 1936"
label variable power_labor_share1937 "Power to Labor Share in 1937"
label variable power_labor_share1938 "Power to Labor Share in 1938"
label variable power_labor_share1939 "Power to Labor Share in 1939"
label variable power_labor_share1940 "Power to Labor Share in 1940"
label variable power_labor_share1941 "Power to Labor Share in 1941"
label variable power_labor_share1942 "Power to Labor Share in 1942"
label variable power_labor_share1943 "Power to Labor Share in 1943"
label variable ln_productivity1934 "Log of Productivity in 1934"
label variable ln_productivity1935 "Log of Productivity in 1935"
label variable ln_productivity1936 "Log of Productivity in 1936"
label variable ln_productivity1937 "Log of Productivity in 1937"
label variable ln_productivity1938 "Log of Productivity in 1938"
label variable ln_productivity1939 "Log of Productivity in 1939"
label variable ln_productivity1940 "Log of Productivity in 1940"
label variable ln_productivity1941 "Log of Productivity in 1941"
label variable ln_productivity1942 "Log of Productivity in 1942"
label variable ln_productivity1943 "Log of Productivity in 1943"
label variable ln_valueadded1934 "Log of Value Added in 1934"
label variable ln_valueadded1935 "Log of Value Added in 1935"
label variable ln_valueadded1936 "Log of Value Added in 1936"
label variable ln_valueadded1937 "Log of Value Added in 1937"
label variable ln_valueadded1938 "Log of Value Added in 1938"
label variable ln_valueadded1939 "Log of Value Added in 1939"
label variable ln_valueadded1940 "Log of Value Added in 1940"
label variable ln_valueadded1941 "Log of Value Added in 1941"
label variable ln_valueadded1942 "Log of Value Added in 1942"
label variable ln_valueadded1943 "Log of Value Added in 1943"
label variable ln_establishments1934 "Log of Establishments in 1934"
label variable ln_establishments1935 "Log of Establishments in 1935"
label variable ln_establishments1936 "Log of Establishments in 1936"
label variable ln_establishments1937 "Log of Establishments in 1937"
label variable ln_establishments1938 "Log of Establishments in 1938"
label variable ln_establishments1939 "Log of Establishments in 1939"
label variable ln_establishments1940 "Log of Establishments in 1940"
label variable ln_establishments1941 "Log of Establishments in 1941"
label variable ln_establishments1942 "Log of Establishments in 1942"
label variable ln_establishments1943 "Log of Establishments in 1943"
label variable ln_inputs_all1934 "Log of Total Inputs in 1934"
label variable ln_inputs_all1935 "Log of Total Inputs in 1935"
label variable ln_inputs_all1936 "Log of Total Inputs in 1936"
label variable ln_inputs_all1937 "Log of Total Inputs in 1937"
label variable ln_inputs_all1938 "Log of Total Inputs in 1938"
label variable ln_inputs_all1939 "Log of Total Inputs in 1939"
label variable ln_inputs_all1940 "Log of Total Inputs in 1940"
label variable ln_inputs_all1941 "Log of Total Inputs in 1941"
label variable ln_inputs_all1942 "Log of Total Inputs in 1942"
label variable ln_inputs_all1943 "Log of Total Inputs in 1943"
label variable timberpaper "Timber and Paper Indicator"
label variable nottimberpaper "Non-Timber and Paper Indicator"
label variable repa_sharel "Reparations Share"
label variable treat_std_l "Standardized Exposure"
label variable ln_repa_tot "Log of Total Reparations"
label variable decile_repashare_2 "Reparation Share Decile"
label variable d2_s "Second Decile Indicator"
label variable d1_s "First Decile Indicator"
label variable treat_dummy "Treatment Dummy"


**Save data for analysis

saveold  "$path/industry_data.dta", replace
